################################################################
# Script: Nederlanderschap helpt migranten op huizenmarkt      #
# Floris Peters, Maarten Vink & Hans Schmeets                  #
# Economisch Statistische Berichten                            #
################################################################


################
# Introduction #
################

# This file provides the syntax used to create all the tables and figures in the paper. 
# The dataset contains sensitive, micro level information. As such, for privacy reasons the data is only available to individuals employed at or affiliated to Statistics Netherlands. 
# The dataset can be found at the following location on the network of Statistics Netherlands: \\cbsp.nl\Productie\Projecten\SAL\209253UM_FP_SEC1\Werk\Floris\PhD\ESB_housing 

#######################################################################################################################
#######################################################################################################################

#############
# Variables #
#############

# ID
# (Individual identification number)

# START
# (Time vector - start of a given time period)

# STOP
# (Time vector - end of a given time period)

# EVENT
# (Homeownership)
# [0] The event does not occur during this time period; 
# [1] The event occurs at the end of this time period

# HOMEOWNERSHIP
# (is an individual a homeowner?)
# [0] no; 
# [1] yes

# YSM
# (years since migration)

# NATURALISED
# (Naturalised - time-varying)
# [0] Not naturalised; 
# [1] Naturalised

# NATURALISATION
# (Naturalisation during the observation period)
# [0] No naturalisation during the observation period; 
# [1] Naturaisation during the observation period

# IMMIGRATIONYEAR
# (Year of first immigration to the Netherlands)

# GENDER
# [0] Female; 
# [1] Male

# AGEARRIVAL
# (Age at the moment of migration in years)

# AGEARRIVAL_SQR
# (Age at the moment of migration in years, squared)

# PARTNER
# [1] No partner; 
# [2] Foreign-born foreign partner; 
# [3] Foreign born naturalised partner; 
# [4] native partner

# NATIVE_PARTNER
# [0] No native partner; 
# [1] Native partner

# PARTNER_ID
# (Partner identification number)

# CHILDREC
# (Children in the household in categories)
# [1] Children <18 in household; 
# [2] no children <18 in household

# INCOME_HH
# (CPI adjusted log disposable household income)

# EMPLOYMENT
# (Employment status)
# [0] Not employed; 
# [1] Employed

# EMPLOYMENT_DUR
# (Duration of employment)

# DEVELOPMENT
# (Human Development Index (HDI) score origin country)

# CULT_DIST
# (level of cultural distance between the origin country and the Netherlands, based on the Hofstede index)
    
# EU
# [0] Not EU country of origin; 
# [1] EU country of origin)

# POST_2008
# [0] observation year <= 2008; 
# [1] observation year > 2008

#######################################################################################################################
#######################################################################################################################


#Upload the dataset (Data_cit_housing_main.sav) to R and load the necessary libraries#
library(Matrix)
library(optimx)
library(splines)
library(foreign)
library(dplyr)
library(plm)
dataset_main <- read.csv(file.choose(),header=T,sep=";")


###########     
# Table 1 #
###########   

#Select immigrants with employment
dataset_empl <- subset(dataset_main, EMPLOYMENT == 1)

#compute the survival function
dataset_empl$surv_table_1_empl <- Surv(dataset_empl$START, dataset_empl$STOP, dataset_empl$EVENT)

#Cox regression models
table_1_model1 <- coxph(surv_table_1_empl ~ NATURALISED + NATURALISATION + as.factor(GENDER) + AGEARRIVAL + AGEARRIVAL_SQR +
                   as.factor(PARTNER) + CHILDREC + INCOME_HH + EMPLOYMENT_DUR + DEVELOPMENTREC +
                   EU + POST2008, data = dataset_empl)

table_1_model2 <- coxph(surv_table_1_empl ~ NATURALISED + NATURALISATION:NATIVE_PARTNER + as.factor(GENDER) + AGEARRIVAL + AGEARRIVAL_SQR +
                   as.factor(PARTNER) + CHILDREC + INCOME_HH + EMPLOYMENT_DUR + DEVELOPMENTREC +
                   EU + POST2008, data = dataset_empl)


